{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n=====================================================\nMNIST classfification using multinomial logistic + L1\n=====================================================\n\nHere we fit a multinomial logistic regression with L1 penalty on a subset of\nthe MNIST digits classification task. We use the SAGA algorithm for this\npurpose: this a solver that is fast when the number of samples is significantly\nlarger than the number of features and is able to finely optimize non-smooth\nobjective functions which is the case with the l1-penalty. Test accuracy\nreaches > 0.8, while weight vectors remains *sparse* and therefore more easily\n*interpretable*.\n\nNote that this accuracy of this l1-penalized linear model is significantly\nbelow what can be reached by an l2-penalized linear model or a non-linear\nmulti-layer perceptron model on this dataset.\n\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import time\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import fetch_openml\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.utils import check_random_state\n\nprint(__doc__)\n\n# Author: Arthur Mensch <arthur.mensch@m4x.org>\n# License: BSD 3 clause\n\n# Turn down for faster convergence\nt0 = time.time()\ntrain_samples = 5000\n\n# Load data from https://www.openml.org/d/554\nX, y = fetch_openml('mnist_784', version=1, return_X_y=True)\n\nrandom_state = check_random_state(0)\npermutation = random_state.permutation(X.shape[0])\nX = X[permutation]\ny = y[permutation]\nX = X.reshape((X.shape[0], -1))\n\nX_train, X_test, y_train, y_test = train_test_split(\n    X, y, train_size=train_samples, test_size=10000)\n\nscaler = StandardScaler()\nX_train = scaler.fit_transform(X_train)\nX_test = scaler.transform(X_test)\n\n# Turn up tolerance for faster convergence\nclf = LogisticRegression(\n    C=50. / train_samples, penalty='l1', solver='saga', tol=0.1\n)\nclf.fit(X_train, y_train)\nsparsity = np.mean(clf.coef_ == 0) * 100\nscore = clf.score(X_test, y_test)\n# print('Best C % .4f' % clf.C_)\nprint(\"Sparsity with L1 penalty: %.2f%%\" % sparsity)\nprint(\"Test score with L1 penalty: %.4f\" % score)\n\ncoef = clf.coef_.copy()\nplt.figure(figsize=(10, 5))\nscale = np.abs(coef).max()\nfor i in range(10):\n    l1_plot = plt.subplot(2, 5, i + 1)\n    l1_plot.imshow(coef[i].reshape(28, 28), interpolation='nearest',\n                   cmap=plt.cm.RdBu, vmin=-scale, vmax=scale)\n    l1_plot.set_xticks(())\n    l1_plot.set_yticks(())\n    l1_plot.set_xlabel('Class %i' % i)\nplt.suptitle('Classification vector for...')\n\nrun_time = time.time() - t0\nprint('Example run in %.3f s' % run_time)\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.9"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}